##9/17/2015
##Replication_Code_4
## This script reproduces Table 1: very bursty terms and
## Fig 4: profile of bursty terms over time.

rm(list=ls())
setwd("./../data/terms_over_time") # ("C:/Users/as9934/Dropbox/HansardProject/burstiness/bjps_replication/data/terms_over_time")

require(scales)

###Table 1
# The various bursty ("burst.amount") terms can be seen in the csv files, labelled:
# 1. terms_bursty_1841_6.csv
# 2. terms_bursty_1865_1.csv
# 3. terms_bursty_1885_1.csv

###Figure 4
# is a conglomeration of all such sheets, just focussed on particular words

session.list<-c("1832_1", "1832_2", "1835_1", "1835_2", "1835_3", "1837_1", 
  "1837_2", "1837_3", "1837_4", "1841_1", "1841_2", "1841_3", "1841_4", 
  "1841_5", "1841_6", "1841_7", "1847_1", "1847_2", "1847_3", "1847_4", 
  "1847_5", "1852_1", "1852_2", "1852_3", "1852_4", "1852_5", "1857_1", 
  "1857_2", "1857_3", "1859_1", "1859_2", "1859_3", "1859_4", "1859_5", 
  "1859_6", "1859_7", "1865_1", "1865_2", "1865_3", "1868_1", "1868_2", 
  "1868_3", "1868_4", "1868_5", "1874_1", "1874_2", "1874_3", "1874_4", 
  "1874_5", "1874_6", "1874_7", "1880_1", "1880_2", "1880_3", "1880_4", 
  "1880_5", "1880_6", "1885_1", "1886_1", "1886_2", "1886_3", "1886_4", 
  "1886_5", "1886_6", "1886_7", "1892_1", "1892_2", "1892_3", "1892_4", 
  "1895_1", "1895_2", "1895_3", "1895_4", "1895_5", "1895_6", "1895_7", 
  "1900_1", "1900_2", "1900_3", "1900_4", "1900_5", "1900_6", "1906_1", 
  "1906_2", "1906_3", "1906_4", "1910A_1")

session.list.csv <- paste("terms_bursty_", session.list,".csv", sep="")

 
  
  #function to grab words in rc output
  finder <- function(x="word" ,y){match(x,y)}
  
  
  #main function to look for words etc
  get.words <- function(word="gentlemen"){
    
    std.burst <- c()
    
    for ( i in 1:length(session.list)){
      rc <- read.csv(session.list.csv[i], fileEncoding="latin1") ## NB: seems to address "invalid multibyte string" error
      out <- rc[finder(y=rc$word),"burst.amount"]
      
      
      #normalize bursts to allow for increased means etc
      out.rescale <- rescale(rc$burst.amount)
      out.std <- out.rescale[match(word, rc$word)]
      if(is.na(out.std)==T){out.std<-0} #give word a burstiness of 0, if NA
      
      std.burst <- c(std.burst, out.std)
      
    }
    list(word, std.burst)
  }
  
  par(las=2)    
  par(bg="cornsilk1")           
  par(mfrow=c(4,1))
  par(mar=c(4,3,1,1))
  
  slist <- session.list
  slist[length(slist)] <- "1910"
  labs <-  gsub("_.*","",slist)
  
  uni.sess <-  unique(gsub("_.*","",slist))
  m <- match(uni.sess, labs)
  labs2 <- rep(NA, length(labs) )
  labs2[m] <- uni.sess
  
  #### tariff
  tariff <- get.words("tariff")              
  plot(1:length(session.list),tariff[[2]], axes=F, type="p", xlab="", xaxs="i",yaxs="i", 
       ylab="Standardized Burst", pch=21, col="black", bg="pink", cex=1.3, ylim=c(0.01,1))
  axis(1, at=1:length(session.list), labels=labs2, cex.axis=.7)
  axis(2)
  box()
  text(1,.9,  labels=tariff[[1]], pos=4, font=2, cex=1.2)
  
  fit <- loess(tariff[[2]] ~ seq(1,length(session.list)), span=.2)
  lines(fit$x, predict(fit), type="l", col="red", lwd=2)
  
  
  
  
  #### zulu
  zulu <- get.words("zulu")              
  plot(1:length(session.list),zulu[[2]], axes=F, type="p", xlab="", xaxs="i",yaxs="i", 
       ylab="Standardized Burst", pch=21, col="black", bg="pink", cex=1.3, ylim=c(0.01,1))
  axis(1, at=1:length(session.list), labels=labs2, cex.axis=.7)
  axis(2)
  box()
  text(1,.9,  labels=zulu[[1]], pos=4, font=2, cex=1.2)
  
  fit <- loess(zulu[[2]] ~ seq(1,length(session.list)), span=.2)
  lines(fit$x, predict(fit), type="l", col="red", lwd=2)
  
  #### ireland
  ireland <- get.words("ireland")              
  plot(1:length(session.list), ireland[[2]], axes=F, type="p", xlab="", xaxs="i",yaxs="i", 
       ylab="Standardized Burst", pch=21, col="black", bg="pink", cex=1.3, ylim=c(0.01,1))
  axis(1, at=1:length(session.list), labels=labs2, cex.axis=.7)
  axis(2)
  box()
  text(1,.9,  labels=ireland[[1]], pos=4, font=2, cex=1.2)
  
  fit <- loess(ireland[[2]] ~ seq(1,length(session.list)), span=.2)
  lines(fit$x, predict(fit), type="l", col="red", lwd=2)
  
  #### gentlemen
  gentlemen <- get.words("gentlemen")              
  plot(1:length(session.list), gentlemen[[2]], axes=F, type="p", xlab="", xaxs="i",yaxs="i", 
       ylab="Standardized Burst", pch=21, col="black", bg="pink", cex=1.3, ylim=c(0.01,1))
  axis(1, at=1:length(session.list), labels=labs2, cex.axis=.7)
  axis(2)
  box()
  text(1,.9,  labels= gentlemen[[1]], pos=4, font=2, cex=1.2)
  
  fit <- loess(gentlemen[[2]] ~ seq(1,length(session.list)), span=.2)
  lines(fit$x, predict(fit), type="l", col="red", lwd=2)